{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Preparing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install FinRL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install wrds\n",
    "!pip install swig\n",
    "!pip install tushare\n",
    "!pip install -q condacolab\n",
    "import condacolab\n",
    "condacolab.install()\n",
    "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n",
    "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install other libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install stockstats\n",
    "!pip install tushare\n",
    "#install talib\n",
    "!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz \n",
    "!tar xvzf ta-lib-0.4.0-src.tar.gz\n",
    "import os\n",
    "os.chdir('ta-lib') \n",
    "!./configure --prefix=/usr\n",
    "!make\n",
    "!make install\n",
    "#!sudo make install # Sometimes it need root \n",
    "os.chdir('../')\n",
    "!pip install TA-Lib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%cd /\n",
    "!git clone https://github.com/AI4Finance-Foundation/FinRL-Meta\n",
    "%cd /FinRL-Meta/"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Processing data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from meta.data_processor import DataProcessor \n",
    "from meta import config "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Configs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tushare successfully connected\n"
     ]
    }
   ],
   "source": [
    "ticker_list = ['600000.SH', '600009.SH', '600016.SH', '600028.SH', '600030.SH', '600031.SH', '600036.SH', '600050.SH', '600104.SH', '600196.SH', '600276.SH', '600309.SH', '600519.SH', '600547.SH', '600570.SH']\n",
    "\n",
    "TRAIN_START_DATE = '2015-01-01' \n",
    "TRAIN_END_DATE= '2019-08-01' \n",
    "TRADE_START_DATE = '2019-08-01' \n",
    "TRADE_END_DATE = '2020-01-03'\n",
    "\n",
    "TIME_INTERVAL = \"1d\" \n",
    "kwargs = {} \n",
    "kwargs['token'] = '27080ec403c0218f96f388bca1b1d85329d563c91a43672239619ef5' \n",
    "p = DataProcessor(data_source='tushare', start_date=TRAIN_START_DATE, end_date=TRADE_END_DATE, time_interval=TIME_INTERVAL, **kwargs)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 15/15 [00:23<00:00,  1.55s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Download complete! Dataset saved to ./data/dataset.csv. \n",
      "Shape of DataFrame: (17960, 8)\n",
      "Shape of DataFrame:  (18315, 8)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "p.download_data(ticker_list=ticker_list)\n",
    "p.clean_data()\n",
    "p.fillna()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add technical indicator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tech_indicator_list:  ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']\n",
      "indicator:  macd\n",
      "indicator:  boll_ub\n",
      "indicator:  boll_lb\n",
      "indicator:  rsi_30\n",
      "indicator:  cci_30\n",
      "indicator:  dx_30\n",
      "indicator:  close_30_sma\n",
      "indicator:  close_60_sma\n",
      "Succesfully add technical indicators\n",
      "Shape of DataFrame:  (18270, 17)\n"
     ]
    }
   ],
   "source": [
    "p.add_technical_indicator(config.INDICATORS) \n",
    "p.fillna()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data split & save data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.shape: (16695, 17)\n"
     ]
    }
   ],
   "source": [
    "train = p.data_split(p.dataframe, TRAIN_START_DATE, TRAIN_END_DATE) \n",
    "train.to_csv(\"./train.csv\")\n",
    "print(f\"train.shape: {train.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trade.shape: (1560, 17)\n"
     ]
    }
   ],
   "source": [
    "trade = p.data_split(p.dataframe, TRADE_START_DATE, TRADE_END_DATE) \n",
    "trade.to_csv(\"./trade_data.csv\")\n",
    "print(f\"trade.shape: {trade.shape}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "finrl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "afd6dc03c9be451573fc2885de79a969af6a24a159f11a3ead741ab7a9ff405f"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
